#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
DaPars2(Modified): Dynamics analysis of Alternative PolyAdenylation from multiple RNA-seq data.
"""

import argparse
import os

from dapars2.DaPars_Extract_Anno import Annotation_prepar_3UTR_extraction, Subtract_different_strand_overlap, get_chromList
from dapars2.extract_read_depth import extract_read_depth
from dapars2.DaPars2_Multi_Sample_Multi_Chr import De_Novo_3UTR_Identification_Loading_Target_Wig_for_TCGA_Multiple_Samples_Multiple_threads_Main3_shared_list
from dapars2.merge_apa_quant_res_by_chr import merge_apa_res
from dapars2.DaPars_filter import DaPars_Filtering

def DaPars2_main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
        )
    base = parser.add_argument_group('base parameters', 'parameters used in DaPars2 base analysis')
    
    base.add_argument('-b', '--bed', dest='gene_bed_file', metavar='gene_bed_file', type=str, required=True, help='Input gene bed file, which is downloaded from UCSC Table browser')
    base.add_argument('-s', '--symbol', dest='gene_symbol_annotation_file', metavar='gene_symbol_annotation_file', type=str, required=True, help='Input gene symbol annotation file, which is downloaded from UCSC Table browser')
    base.add_argument('-w', '--wigfiles',dest="wigfiles", metavar="wigfiles", nargs="+", required=True, help='wig files generated by: "bedtools genomecov -ibam *.bam -bga -split -trackline"')
    base.add_argument('-f', '--flagstat-files',dest="flagstat", metavar="flagstat", nargs="+", required=True, help='flagstat files generated by: "samtools flagstat *.bam", only pair-end support')
    base.add_argument('-t', '--threads', dest="Num_threads", metavar="threads", type=int, default=4, help='Number of threads')
    base.add_argument('-c', '--first-100-3UTR-coverage', dest="first_100_3UTR_Coverage_threshold", metavar="first_100_3UTR_coverage_threshold", type=float, default=10, help='first 100bp 3UTR coverage threshold')
    base.add_argument('--sample-list', dest="sample_list", metavar="sample_list", nargs="+", default=None, type=str, help="sample list")
    base.add_argument('-o', '--outdir', dest="output_folder", metavar="output_folder", type=str, default="DaPars_results", help='output folder')
    
    filt = parser.add_argument_group('filter parameters', 'parameters used in DaPars2 filter analysis, this step will be executed only --treat-index and --control-index are specified')
    
    filt.add_argument("--treat-index", dest="treat_index", nargs="+", default=None, type=int, help="treat sample index, 1 based")
    filt.add_argument("--control-index", dest="control_index", nargs="+", default=None, type=int, help="control sample index, 1 based")
    filt.add_argument("--3UTR-Coverage-cutoff", dest="Coverage_cutoff", type=float, default=30, help="3UTR coverage cutoff")
    filt.add_argument("--Num-least-in-treat", dest="Num_least_in_treat", type=int, default=1, help="Num sample pass 3UTR coverage filter least in treat")
    filt.add_argument("--Num-least-in-control", dest="Num_least_in_control", type=int, default=1, help="Num sample pass 3UTR coverage filter least in control")
    filt.add_argument("--FDR-cutoff", dest="FDR_cutoff", type=float, default=0.05, help="FDR cutoff")
    filt.add_argument("--PDUI-cutoff", dest="PDUI_cutoff", type=float, default=0.2, help="PDUI cutoff")
    filt.add_argument("--Fold-change-cutoff", dest="Fold_change_cutoff", type=float, default=0.58, help="Fold change cutoff")
    
    args = parser.parse_args()
    
    outdir = args.output_folder
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    
    gene_bed_file = args.gene_bed_file
    gene_symbol_annotation_file = args.gene_symbol_annotation_file
    output_final_extract_file = os.path.join(outdir, "3UTR_annotation.bed")
    chromlist_file = os.path.join(outdir, "chromList.txt")
    output_extract_file = os.path.join(outdir,'temp_anno_extracted.bed')
    if not os.path.exists(output_final_extract_file):
        print("Generating regions ...")
        Annotation_prepar_3UTR_extraction(gene_bed_file, gene_symbol_annotation_file,output_extract_file)
        Subtract_different_strand_overlap(output_extract_file,output_final_extract_file)
        get_chromList(output_final_extract_file, chromlist_file)
        try:
            os.remove(output_extract_file)
        except OSError:
            pass
    wigfiles = args.wigfiles
    flagstat = args.flagstat
    read_depth = os.path.join(outdir, "readDepth.txt")
    if not os.path.exists(read_depth):
        print("Extracting read depth ...")
        assert len(wigfiles) == len(flagstat), "wigfiles and flagstat files should be the same length"
        extract_read_depth(wigfiles, flagstat, read_depth)
        
    apa_res = os.path.join(outdir, "DaPars2_res.all_chromosomes.txt")
    if args.sample_list is not None:
        sample_list = args.sample_list
    else:
        sample_list = [os.path.basename(i).split(".")[0] for i in args.wigfiles]
    if not os.path.exists(apa_res):
        print("Calculate PDUI ...")
        outdir_prefix = os.path.join(outdir, "DaPars2_out")
        result_prefix = "DaPars2"
        De_Novo_3UTR_Identification_Loading_Target_Wig_for_TCGA_Multiple_Samples_Multiple_threads_Main3_shared_list(
            chromlist_file, wigfiles, outdir_prefix, output_final_extract_file, result_prefix, read_depth, args.Num_threads, args.first_100_3UTR_Coverage_threshold)
        print("Merge results ...")
        merge_apa_res(outdir_prefix, result_prefix, chromlist_file, sample_list, apa_res)
    
    if args.treat_index is not None and args.control_index is not None:
        print("Filter results ...")
        DaPars_Filtering(apa_res, sample_list, args.treat_index, args.control_index, os.path.join(outdir, "DaPars2_res.Filtered.txt"),
                         args.Coverage_cutoff, args.Num_least_in_treat, args.Num_least_in_control, args.FDR_cutoff, args.PDUI_cutoff, args.Fold_change_cutoff)
    print("Finished!")


if __name__ == '__main__':
    DaPars2_main()
